import requests
from lxml import etree
from urllib import parse
headers = {
  'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
}
def request_list_page():
  url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false',
  
  data = {
    'first': 'true',
    'pn': 1,
    'kd': 'python'
  }
  for x in range(1,14):
    data['pn'] = x
    res = requests.post(url, headers=headers, data=data)
    result = res.json()
    positions = result['content']['positionResult']['result']
    for position in positions:
      positionId = positions['positionId']
      positions_url = 'https://www.lagou.com/jobs/%s.html' % positionId
      parse_position_detail(positions_url)
      break
    break

def parse_position_detail(url):
  res = requests(url, headers=headers)
  html = etree.HTML(res.text)
  position_name = html.xpath("//span[@class='name']/text()")[0]
  job_request_spans = html.xpath("//dd[@class='job_request']//span")
  salary = job_request_spans[0].xpath(".//text()")[0].strip()
  city = job_request_spans[1].xpath(".//text()")[0].strip()
  city = re.sub(r"[\s/]","",city)
  desc = "".join(html.xpath("//dd[@class='job_bt']//text()"))
  # 自动load成字典
  # print(res.json())

request_list_page()